Clear workspace
rm(list = ls())
library(bigrquery)
library(stringr)
library(tidyverse)
library(dplyr)
library(lme4)
library(MuMIn)
library(scales)
response <- try(system('~/google-cloud-sdk/bin/gcloud projects list --quiet', intern = T))
projectid <- strsplit(response[2], " ")[[1]][1]
create_dataset <- function(poolname) {
sql <- str_replace_all("SELECT
##POOL_NAME##.percentage_of_regional_pool_present,
##POOL_NAME##.difference_from_locality_trait_gravity,
##POOL_NAME##.percentage_of_niches_present,
##POOL_NAME##.percentage_of_niches_2_present,
##POOL_NAME##.percentage_of_niches_3_present,
latitude,
longitude,
percentage_landcover_5km.closed_forest_total AS closed_forest,
percentage_landcover_5km.cultivated,
percentage_landcover_5km.herbaceous_vegetation,
percentage_landcover_5km.herbaceous_wetland,
percentage_landcover_5km.open_forest_total AS open_forest,
percentage_landcover_5km.permanent_water,
percentage_landcover_5km.shrubs,
percentage_landcover_5km.urban,
percentage_landcover_5km.elevation.mean AS mean_elevation,
percentage_landcover_5km.elevation.delta AS elevation_delta,
average_population_density.within_5km AS average_population_density,
urban_area.name AS city_name,
urban_area.location.continent,
urban_area.ecosystem.realm,
urban_area.ecosystem.biome.biome_name AS biome,
urban_area.country_economy.gdp_estimate_thousand_dollars_per_person AS national_gdp_estimate_thousand_dollars_per_person,
urban_area.country_economy.income_group AS national_income_group,
locality_id
FROM model.urban_hotspot
JOIN model2.all_species USING(locality_id, city_id)
JOIN model.urban_area USING (city_id)", '##POOL_NAME##', poolname)
print(sql)
tb <- bq_project_query(projectid, sql)
bq_table_download(tb)
}
load_dataset <- function(poolname) {
filename <- str_replace('hotspot_metrics_##POOL_NAME##.csv', '##POOL_NAME##', poolname)
if (!file.exists(filename)) {
data <- create_dataset(poolname)
write_csv(data, filename)
}
data <- read_csv(filename)
data$city_name = as.factor(data$city_name)
data$continent = relevel(as.factor(data$continent), ref = "Europe")
data$realm = relevel(as.factor(data$realm), ref = "Palearctic")
data$biome = as.factor(data$biome)
data$national_income_group = as.factor(data$national_income_group)
data$mean_elevation_scaled = rescale(data$mean_elevation, to = c(0, 1), from = range(data$mean_elevation, na.rm = TRUE, finite = TRUE))
data$elevation_delta_scaled = rescale(data$elevation_delta, to = c(0, 1), from = range(data$elevation_delta, na.rm = TRUE, finite = TRUE))
data$average_population_density_scaled = rescale(data$average_population_density, to = c(0, 1), from = range(data$average_population_density, na.rm = TRUE, finite = TRUE))
data$national_gdp_estimate_thousand_dollars_per_person_scaled = rescale(data$national_gdp_estimate_thousand_dollars_per_person, to = c(0, 1), from = range(data$national_gdp_estimate_thousand_dollars_per_person, na.rm = TRUE, finite = TRUE))
data$latitude_scaled = rescale(data$latitude, to = c(0, 1), from = range(data$latitude, na.rm = TRUE, finite = TRUE))
data$longitude_scaled = rescale(data$longitude, to = c(0, 1), from = range(data$longitude, na.rm = TRUE, finite = TRUE))
data$absolute_latitude_scaled = abs(data$latitude_scaled)
data
}
merlin <- load_dataset('merlin')
[1] "SELECT \n merlin.percentage_of_regional_pool_present,\n merlin.difference_from_locality_trait_gravity,\n merlin.percentage_of_niches_present,\n merlin.percentage_of_niches_2_present,\n merlin.percentage_of_niches_3_present,\n latitude,\n longitude,\n percentage_landcover_5km.closed_forest_total AS closed_forest,\n percentage_landcover_5km.cultivated,\n percentage_landcover_5km.herbaceous_vegetation,\n percentage_landcover_5km.herbaceous_wetland,\n percentage_landcover_5km.open_forest_total AS open_forest,\n percentage_landcover_5km.permanent_water,\n percentage_landcover_5km.shrubs,\n percentage_landcover_5km.urban,\n percentage_landcover_5km.elevation.mean AS mean_elevation,\n percentage_landcover_5km.elevation.delta AS elevation_delta,\n average_population_density.within_5km AS average_population_density,\n urban_area.name AS city_name,\n urban_area.location.continent,\n urban_area.ecosystem.realm,\n urban_area.ecosystem.biome.biome_name AS biome,\n urban_area.country_economy.gdp_estimate_thousand_dollars_per_person AS national_gdp_estimate_thousand_dollars_per_person,\n urban_area.country_economy.income_group AS national_income_group,\n locality_id\nFROM model.urban_hotspot\nJOIN model2.all_species USING(locality_id, city_id)\nJOIN model.urban_area USING (city_id)"
Running job 'endless-matter-297214.job_spoeUbAlTzdKLDzFKsc29dNnzDNT.EU' [-] 1s
Running job 'endless-matter-297214.job_spoeUbAlTzdKLDzFKsc29dNnzDNT.EU' [\] 1s
Running job 'endless-matter-297214.job_spoeUbAlTzdKLDzFKsc29dNnzDNT.EU' [|] 2s
Running job 'endless-matter-297214.job_spoeUbAlTzdKLDzFKsc29dNnzDNT.EU' [/] 2s
Running job 'endless-matter-297214.job_spoeUbAlTzdKLDzFKsc29dNnzDNT.EU' [-] 2s
Complete
Billed: 62.91 MB
Downloading 8,443 rows in 1 pages.
Downloading data [===============================================================================================================================================================] 100% ETA: 0s
Parsing [=============================================================================================================================================================================] ETA: 0s
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
city_name = col_character(),
continent = col_character(),
realm = col_character(),
biome = col_character(),
national_income_group = col_character(),
locality_id = col_character()
)
ℹ Use `spec()` for the full column specifications.
merlin
birdlife <- load_dataset('birdlife')
[1] "SELECT \n birdlife.percentage_of_regional_pool_present,\n birdlife.difference_from_locality_trait_gravity,\n birdlife.percentage_of_niches_present,\n birdlife.percentage_of_niches_2_present,\n birdlife.percentage_of_niches_3_present,\n latitude,\n longitude,\n percentage_landcover_5km.closed_forest_total AS closed_forest,\n percentage_landcover_5km.cultivated,\n percentage_landcover_5km.herbaceous_vegetation,\n percentage_landcover_5km.herbaceous_wetland,\n percentage_landcover_5km.open_forest_total AS open_forest,\n percentage_landcover_5km.permanent_water,\n percentage_landcover_5km.shrubs,\n percentage_landcover_5km.urban,\n percentage_landcover_5km.elevation.mean AS mean_elevation,\n percentage_landcover_5km.elevation.delta AS elevation_delta,\n average_population_density.within_5km AS average_population_density,\n urban_area.name AS city_name,\n urban_area.location.continent,\n urban_area.ecosystem.realm,\n urban_area.ecosystem.biome.biome_name AS biome,\n urban_area.country_economy.gdp_estimate_thousand_dollars_per_person AS national_gdp_estimate_thousand_dollars_per_person,\n urban_area.country_economy.income_group AS national_income_group,\n locality_id\nFROM model.urban_hotspot\nJOIN model2.all_species USING(locality_id, city_id)\nJOIN model.urban_area USING (city_id)"
Running job 'endless-matter-297214.job_vxoxrOFexVLAp6orqCnzvG1KVo8T.EU' [-] 1s
Running job 'endless-matter-297214.job_vxoxrOFexVLAp6orqCnzvG1KVo8T.EU' [\] 1s
Running job 'endless-matter-297214.job_vxoxrOFexVLAp6orqCnzvG1KVo8T.EU' [|] 2s
Running job 'endless-matter-297214.job_vxoxrOFexVLAp6orqCnzvG1KVo8T.EU' [/] 2s
Running job 'endless-matter-297214.job_vxoxrOFexVLAp6orqCnzvG1KVo8T.EU' [-] 2s
Complete
Billed: 62.91 MB
Downloading 8,443 rows in 1 pages.
Downloading data [===============================================================================================================================================================] 100% ETA: 0s
Parsing [=============================================================================================================================================================================] ETA: 0s
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
city_name = col_character(),
continent = col_character(),
realm = col_character(),
biome = col_character(),
national_income_group = col_character(),
locality_id = col_character()
)
ℹ Use `spec()` for the full column specifications.
birdlife
both <- load_dataset('both')
[1] "SELECT \n both.percentage_of_regional_pool_present,\n both.difference_from_locality_trait_gravity,\n both.percentage_of_niches_present,\n both.percentage_of_niches_2_present,\n both.percentage_of_niches_3_present,\n latitude,\n longitude,\n percentage_landcover_5km.closed_forest_total AS closed_forest,\n percentage_landcover_5km.cultivated,\n percentage_landcover_5km.herbaceous_vegetation,\n percentage_landcover_5km.herbaceous_wetland,\n percentage_landcover_5km.open_forest_total AS open_forest,\n percentage_landcover_5km.permanent_water,\n percentage_landcover_5km.shrubs,\n percentage_landcover_5km.urban,\n percentage_landcover_5km.elevation.mean AS mean_elevation,\n percentage_landcover_5km.elevation.delta AS elevation_delta,\n average_population_density.within_5km AS average_population_density,\n urban_area.name AS city_name,\n urban_area.location.continent,\n urban_area.ecosystem.realm,\n urban_area.ecosystem.biome.biome_name AS biome,\n urban_area.country_economy.gdp_estimate_thousand_dollars_per_person AS national_gdp_estimate_thousand_dollars_per_person,\n urban_area.country_economy.income_group AS national_income_group,\n locality_id\nFROM model.urban_hotspot\nJOIN model2.all_species USING(locality_id, city_id)\nJOIN model.urban_area USING (city_id)"
Running job 'endless-matter-297214.job_WCJj3mTt7ZVFGKZrhuqWjKjwNLMw.EU' [-] 1s
Running job 'endless-matter-297214.job_WCJj3mTt7ZVFGKZrhuqWjKjwNLMw.EU' [\] 1s
Running job 'endless-matter-297214.job_WCJj3mTt7ZVFGKZrhuqWjKjwNLMw.EU' [|] 2s
Running job 'endless-matter-297214.job_WCJj3mTt7ZVFGKZrhuqWjKjwNLMw.EU' [/] 2s
Running job 'endless-matter-297214.job_WCJj3mTt7ZVFGKZrhuqWjKjwNLMw.EU' [-] 2s
Complete
Billed: 62.91 MB
Downloading 8,443 rows in 1 pages.
Downloading data [===============================================================================================================================================================] 100% ETA: 0s
Parsing [=============================================================================================================================================================================] ETA: 0s
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
city_name = col_character(),
continent = col_character(),
realm = col_character(),
biome = col_character(),
national_income_group = col_character(),
locality_id = col_character()
)
ℹ Use `spec()` for the full column specifications.
both
either <- load_dataset('either')
[1] "SELECT \n either.percentage_of_regional_pool_present,\n either.difference_from_locality_trait_gravity,\n either.percentage_of_niches_present,\n either.percentage_of_niches_2_present,\n either.percentage_of_niches_3_present,\n latitude,\n longitude,\n percentage_landcover_5km.closed_forest_total AS closed_forest,\n percentage_landcover_5km.cultivated,\n percentage_landcover_5km.herbaceous_vegetation,\n percentage_landcover_5km.herbaceous_wetland,\n percentage_landcover_5km.open_forest_total AS open_forest,\n percentage_landcover_5km.permanent_water,\n percentage_landcover_5km.shrubs,\n percentage_landcover_5km.urban,\n percentage_landcover_5km.elevation.mean AS mean_elevation,\n percentage_landcover_5km.elevation.delta AS elevation_delta,\n average_population_density.within_5km AS average_population_density,\n urban_area.name AS city_name,\n urban_area.location.continent,\n urban_area.ecosystem.realm,\n urban_area.ecosystem.biome.biome_name AS biome,\n urban_area.country_economy.gdp_estimate_thousand_dollars_per_person AS national_gdp_estimate_thousand_dollars_per_person,\n urban_area.country_economy.income_group AS national_income_group,\n locality_id\nFROM model.urban_hotspot\nJOIN model2.all_species USING(locality_id, city_id)\nJOIN model.urban_area USING (city_id)"
Running job 'endless-matter-297214.job_KRI4oEEnQ-77VUVV7pjbkXxCqETp.EU' [-] 1s
Running job 'endless-matter-297214.job_KRI4oEEnQ-77VUVV7pjbkXxCqETp.EU' [\] 1s
Running job 'endless-matter-297214.job_KRI4oEEnQ-77VUVV7pjbkXxCqETp.EU' [|] 2s
Running job 'endless-matter-297214.job_KRI4oEEnQ-77VUVV7pjbkXxCqETp.EU' [/] 2s
Running job 'endless-matter-297214.job_KRI4oEEnQ-77VUVV7pjbkXxCqETp.EU' [-] 2s
Complete
Billed: 62.91 MB
Downloading 8,443 rows in 1 pages.
Downloading data [===============================================================================================================================================================] 100% ETA: 0s
Parsing [=============================================================================================================================================================================] ETA: 0s
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
.default = col_double(),
city_name = col_character(),
continent = col_character(),
realm = col_character(),
biome = col_character(),
national_income_group = col_character(),
locality_id = col_character()
)
ℹ Use `spec()` for the full column specifications.
either
city_data <- read_csv('city_data.csv')
── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
cols(
name = col_character(),
population = col_double(),
total_area = col_double(),
closed_forest = col_double(),
cultivated = col_double(),
herbaceous_vegetation = col_double(),
herbaceous_wetland = col_double(),
open_forest = col_double(),
permanent_water = col_double(),
shrubs = col_double(),
urban = col_double(),
realm = col_character()
)
city_data$population_scaled = rescale(city_data$population, to = c(0, 1), from = range(city_data$population, na.rm = TRUE, finite = TRUE))
city_data$area_scaled = rescale(city_data$total_area, to = c(0, 1), from = range(city_data$total_area, na.rm = TRUE, finite = TRUE))
city_data$realm <- as.factor(city_data$realm)
city_data